import re
import urllib

def getHtml(url):
    page = urllib.urlopen(url)
    html = page.read().decode("gb2312")
    print "finish uploading"
    return html

def getImg(html):
#     reg = r'src="(.+?\.jpg)" pic_ext'
#     imgre = re.compile(reg)
    print "start parsing"
    imglist = re.findall('file=M.\d{10}.A">(.*?)</a>',html)
    #imglist = re.findall('shtml.*?>(.*?)</a>',html)
    return imglist      
   
html = getHtml("http://bbs.ustc.edu.cn/cgi/bbstdoc?board=Job&start=6900")
#html = getHtml("http://news.sina.com.cn/china")


print len(html)

imgdata = getImg(html)

for x in imgdata:
    print x.replace('&nbsp', '')